@inproceedings {conf/edbtw/VakaliPD04,
	title = {An Overview of Web Data Clustering Practices},
	booktitle = {EDBT Workshops},
	series = {Lecture Notes in Computer Science},
	volume = {3268},
	year = {2004},
	pages = {597-606},
	publisher = {Springer},
	organization = {Springer},
	abstract = {<p>Clustering is a challenging topic in the area of Web data management.Various forms of clustering are required in a wide range of applications, includingfinding mirrored Web pages, detecting copyright violations, and reporting searchresults in a structured way. Clustering can either be performed once offline, (independentlyto search queries), or online (on the results of search queries). Importantefforts have focused on mining Web access logs and to cluster search engine resultson the fly. Online methods based on link structure and text have been appliedsuccessfully to finding pages on related topics. This paper presents an overview ofthe most popular methodologies and implementations in terms of clustering eitherWeb users or Web sources and presents a survey about current status and futuretrends in clustering employed over the Web.</p>
},
	keywords = {Web Data Clustering},
	isbn = {3-540-23305-9},
	author = {Athena Vakali and Pokorny, Jaroslav and Dalamagas, Theodore},
	editor = {Lindner, Wolfgang and Mesiti, Marco and T{\"u}rker, Can and Tzitzikas, Yannis and Athena Vakali}
}
@inproceedings {1809,
	title = {An Overview of Web Data Clustering Practices},
	year = {2004},
	abstract = {<p>Clustering is a challenging topic in the area of Web data management.Various forms of clustering are required in a wide range of applications, includingfinding mirrored Web pages, detecting copyright violations, and reporting searchresults in a structured way. Clustering can either be performed once offline, (independentlyto search queries), or online (on the results of search queries). Importantefforts have focused on mining Web access logs and to cluster search engine resultson the fly. Online methods based on link structure and text have been appliedsuccessfully to finding pages on related topics. This paper presents an overview ofthe most popular methodologies and implementations in terms of clustering eitherWeb users or Web sources and presents a survey about current status and futuretrends in clustering employed over the Web.</p>
}
}
@inproceedings {1842,
	title = {An Object-Based Approach for Effective XML Data Storage},
	year = {2001},
	abstract = {<p>XML data storage is a critical issue due to the socalled IObottleneck problem emerged in nowdays computer systems This paperpresents an ob jectbased XML data representation model towards eective XML data placement The proposed representation of XML docu ments is analysed in a twolevel scheme  the external level is based onthe structure of a browsing graph whereas the internal level is supportedby a treelike structure The main contribution of the paper is that it exploits the ob ject data model in order to consider XML data dependenciesaccess frequencies and constraints A simulation model has been developed in order to evaluate dierent XML data placement strategies andthe impact of the proposed representation model in the overall storageprocess XML data placement is applied on a tertiary storage subsystemby either constructive or iterative placement techniques Three popularpolicies  the Organpipethe Camel and the Simulated Annealing algorithmshave been considered and experiments have been carried out onsynthetic workloads of XML data sets The need of applying an XMLdata storage policy is apparent as indicated by the resulted improve ments in seek and service times The Simulated Annealing approach hasbeen proven to outperform the other XML data placement strategies.</p>
}
}